See tutorial on Item Response Theory (IRT).

Data

library(tidyverse)
library(ggside)
library(easystats)
library(patchwork)
library(mirt)

df <- read.csv("https://raw.githubusercontent.com/RealityBending/IllusionGameReliability/main/data/preprocessed_questionnaires.csv") |> 
  select(starts_with("Item_PHQ4")) |> 
  datawizard::remove_empty_rows() 

names(df) <- str_remove_all(names(df), "Item_PHQ4_")

Depression

# s <- 'Anxiety = 1,2
#       Depression = 3,4
#       COV = Anxiety*Depression'
# mod1 <- mirt(df, model = mirt.model(s), itemtype = 'graded', SE = TRUE)


data <- select(df, contains("Depression"))

mod1 <- mirt(data, model = 1, itemtype = 'graded', SE = TRUE)
## 
Iteration: 1, Log-Lik: -23.728, Max-Change: 0.54927
Iteration: 2, Log-Lik: -22.543, Max-Change: 0.88411
Iteration: 3, Log-Lik: -21.267, Max-Change: 1.26194
Iteration: 4, Log-Lik: -19.186, Max-Change: 0.87383
Iteration: 5, Log-Lik: -19.099, Max-Change: 0.66498
Iteration: 6, Log-Lik: -19.057, Max-Change: 0.45699
Iteration: 7, Log-Lik: -19.015, Max-Change: 0.13119
Iteration: 8, Log-Lik: -19.009, Max-Change: 0.10175
Iteration: 9, Log-Lik: -19.004, Max-Change: 0.07540
Iteration: 10, Log-Lik: -18.997, Max-Change: 0.03874
Iteration: 11, Log-Lik: -18.996, Max-Change: 0.03387
Iteration: 12, Log-Lik: -18.995, Max-Change: 0.02976
Iteration: 13, Log-Lik: -18.992, Max-Change: 0.00553
Iteration: 14, Log-Lik: -18.992, Max-Change: 0.00469
Iteration: 15, Log-Lik: -18.992, Max-Change: 0.00424
Iteration: 16, Log-Lik: -18.992, Max-Change: 0.00200
Iteration: 17, Log-Lik: -18.992, Max-Change: 0.00184
Iteration: 18, Log-Lik: -18.992, Max-Change: 0.00174
Iteration: 19, Log-Lik: -18.992, Max-Change: 0.00121
Iteration: 20, Log-Lik: -18.992, Max-Change: 0.00115
Iteration: 21, Log-Lik: -18.992, Max-Change: 0.00109
Iteration: 22, Log-Lik: -18.992, Max-Change: 0.00077
Iteration: 23, Log-Lik: -18.992, Max-Change: 0.00073
Iteration: 24, Log-Lik: -18.992, Max-Change: 0.00069
Iteration: 25, Log-Lik: -18.992, Max-Change: 0.00018
Iteration: 26, Log-Lik: -18.992, Max-Change: 0.00019
Iteration: 27, Log-Lik: -18.992, Max-Change: 0.00018
Iteration: 28, Log-Lik: -18.992, Max-Change: 0.00014
Iteration: 29, Log-Lik: -18.992, Max-Change: 0.00013
Iteration: 30, Log-Lik: -18.992, Max-Change: 0.00013
Iteration: 31, Log-Lik: -18.992, Max-Change: 0.00012
Iteration: 32, Log-Lik: -18.992, Max-Change: 0.00012
Iteration: 33, Log-Lik: -18.992, Max-Change: 0.00011
Iteration: 34, Log-Lik: -18.992, Max-Change: 0.00010
Iteration: 35, Log-Lik: -18.992, Max-Change: 0.00010
## 
## Calculating information matrix...

# M2(mod1, type = "C2", calcNULL = FALSE)

itemfit(mod1)
##           item S_X2 df.S_X2 RMSEA.S_X2 p.S_X2
## 1 Depression_3 28.7       1       1.86      0
## 2 Depression_4  NaN       0        NaN    NaN

# IRT parameters
IRT_parms <- coef(mod1, IRTpars = TRUE, simplify = TRUE)
IRT_parms$items
##                 a     b1    b2    b3   b4
## Depression_3 8.61 -0.477 0.350 1.334   NA
## Depression_4 7.95 -0.479 0.351 0.786 1.34

# Factor
summary(mod1)
##                 F1    h2
## Depression_3 0.981 0.962
## Depression_4 0.978 0.956
## 
## SS loadings:  1.92 
## Proportion Var:  0.959 
## 
## Factor correlations: 
## 
##    F1
## F1  1

# Plots
plot(mod1, type = 'trace', theta_lim = c(-3, 3))

plot(mod1, type = 'infotrace', theta_lim = c(-3, 3))

plot(mod1, type = 'score', theta_lim = c(-3, 3))

plot(mod1, type = 'infoSE', theta_lim = c(-3, 3))

plot(mod1, type = 'rxx', theta_lim = c(-3, 3))

Category Characteristic Curves (CRC) and Item Information Curves

Typically, an optimally informative item will have a large location and broad category coverage (as indicated by location parameters) over theta.

crc <- function(mod, data, x = c(-1.25, 2.25)) {
  Theta <- matrix(seq(-3, 3, by = .01))
  rez <- data.frame()
  for(i in 1:2) {
    dat <- as.data.frame(probtrace(extract.item(mod1, i), Theta))
    dat$Theta <- Theta[,1]
    dat$Information <- normalize(iteminfo(extract.item(mod1, i), Theta))
    dat <- pivot_longer(dat, -one_of(c("Theta", "Information")), names_to = "Answer", values_to = "Probability")
    dat$Item <- names(data)[i]
    rez <- rbind(rez, dat)
  }
  
  rez <- rez |> 
    mutate(
      Answer = case_when(
      Answer == "P.1" ~ "Not at all",
      Answer == "P.2" ~ "Once or twice",
      Answer == "P.3" ~ "Several days",
      Answer == "P.4" ~ "More than half the days",
      TRUE ~ "Nearly every day"
      )) |> 
    mutate(
      Answer = fct_relevel(Answer, c("Not at all", "Once or twice", "Several days", "More than half the days",  "Nearly every day")))
  
  
  p <- rez |> 
    ggplot(aes(x = Theta, y = Probability)) +
    geom_line(aes(y = Information), linetype = "dotted", color = "grey") + 
    geom_line(aes(color = Answer)) + 
    geom_vline(xintercept = x, linetype = "dashed") +
    facet_grid(~Item) + 
    theme_modern() 
  list(p = p, rez = rez)
}

out <- crc(mod1, data, x = c(-1.25, 2.25))
out$p + labs(x = expression(Depression~(theta))) 

Normalized scoring

item4 <- out$rez |> 
  filter(Item == "Depression_4") |> 
  filter(Theta >= -1.5 & Theta <= 2.25) |> 
  mutate(Theta = normalize(Theta))

scores <- item4 |> 
  group_by(Answer) |> 
  filter(Probability == max(Probability)) |> 
  ungroup()

item4 |> 
  ggplot(aes(x = Theta, y = Probability, color = Answer)) +
  geom_line() + 
  theme_modern() 

Anxiety

data <- select(df, contains("Anxiety"))

m_anxiety <- mirt(data, model = 1, itemtype = 'graded', SE = TRUE)
## 
Iteration: 1, Log-Lik: -19.484, Max-Change: 0.67923
Iteration: 2, Log-Lik: -18.134, Max-Change: 1.03571
Iteration: 3, Log-Lik: -16.679, Max-Change: 1.51219
Iteration: 4, Log-Lik: -14.704, Max-Change: 1.93439
Iteration: 5, Log-Lik: -14.462, Max-Change: 2.31611
Iteration: 6, Log-Lik: -14.307, Max-Change: 2.43061
Iteration: 7, Log-Lik: -14.021, Max-Change: 2.30202
Iteration: 8, Log-Lik: -14.006, Max-Change: 2.65289
Iteration: 9, Log-Lik: -13.987, Max-Change: 2.75531
Iteration: 10, Log-Lik: -13.821, Max-Change: 1.50400
Iteration: 11, Log-Lik: -13.805, Max-Change: 0.81955
Iteration: 12, Log-Lik: -13.801, Max-Change: 0.90549
Iteration: 13, Log-Lik: -13.785, Max-Change: 1.70550
Iteration: 14, Log-Lik: -13.779, Max-Change: 0.51799
Iteration: 15, Log-Lik: -13.778, Max-Change: 0.64409
Iteration: 16, Log-Lik: -13.770, Max-Change: 0.59843
Iteration: 17, Log-Lik: -13.769, Max-Change: 0.80317
Iteration: 18, Log-Lik: -13.767, Max-Change: 0.33958
Iteration: 19, Log-Lik: -13.767, Max-Change: 1.82883
Iteration: 20, Log-Lik: -13.765, Max-Change: 0.36195
Iteration: 21, Log-Lik: -13.765, Max-Change: 0.46691
Iteration: 22, Log-Lik: -13.763, Max-Change: 0.48612
Iteration: 23, Log-Lik: -13.762, Max-Change: 0.60732
Iteration: 24, Log-Lik: -13.762, Max-Change: 0.30250
Iteration: 25, Log-Lik: -13.762, Max-Change: 0.66072
Iteration: 26, Log-Lik: -13.761, Max-Change: 0.29525
Iteration: 27, Log-Lik: -13.761, Max-Change: 0.35435
Iteration: 28, Log-Lik: -13.760, Max-Change: 0.55548
Iteration: 29, Log-Lik: -13.759, Max-Change: 0.46850
Iteration: 30, Log-Lik: -13.759, Max-Change: 0.28578
Iteration: 31, Log-Lik: -13.758, Max-Change: 0.26341
Iteration: 32, Log-Lik: -13.758, Max-Change: 0.25786
Iteration: 33, Log-Lik: -13.758, Max-Change: 0.30994
Iteration: 34, Log-Lik: -13.757, Max-Change: 0.53972
Iteration: 35, Log-Lik: -13.757, Max-Change: 0.48032
Iteration: 36, Log-Lik: -13.757, Max-Change: 0.30180
Iteration: 37, Log-Lik: -13.756, Max-Change: 0.32014
Iteration: 38, Log-Lik: -13.756, Max-Change: 0.41981
Iteration: 39, Log-Lik: -13.756, Max-Change: 0.32593
Iteration: 40, Log-Lik: -13.756, Max-Change: 0.37159
Iteration: 41, Log-Lik: -13.755, Max-Change: 0.40391
Iteration: 42, Log-Lik: -13.755, Max-Change: 0.32229
Iteration: 43, Log-Lik: -13.755, Max-Change: 0.25980
Iteration: 44, Log-Lik: -13.755, Max-Change: 0.25479
Iteration: 45, Log-Lik: -13.754, Max-Change: 0.22322
Iteration: 46, Log-Lik: -13.754, Max-Change: 0.24019
Iteration: 47, Log-Lik: -13.754, Max-Change: 0.39856
Iteration: 48, Log-Lik: -13.754, Max-Change: 0.76429
Iteration: 49, Log-Lik: -13.753, Max-Change: 1.35886
Iteration: 50, Log-Lik: -13.753, Max-Change: 0.21730
Iteration: 51, Log-Lik: -13.753, Max-Change: 0.21397
Iteration: 52, Log-Lik: -13.753, Max-Change: 0.36451
Iteration: 53, Log-Lik: -13.753, Max-Change: 1.10765
Iteration: 54, Log-Lik: -13.752, Max-Change: 0.80621
Iteration: 55, Log-Lik: -13.752, Max-Change: 0.23646
Iteration: 56, Log-Lik: -13.752, Max-Change: 0.12858
Iteration: 57, Log-Lik: -13.752, Max-Change: 0.12158
Iteration: 58, Log-Lik: -13.752, Max-Change: 0.09857
Iteration: 59, Log-Lik: -13.752, Max-Change: 0.03461
Iteration: 60, Log-Lik: -13.752, Max-Change: 0.03430
Iteration: 61, Log-Lik: -13.752, Max-Change: 0.16160
Iteration: 62, Log-Lik: -13.752, Max-Change: 0.18197
Iteration: 63, Log-Lik: -13.752, Max-Change: 0.12825
Iteration: 64, Log-Lik: -13.752, Max-Change: 0.10943
Iteration: 65, Log-Lik: -13.752, Max-Change: 0.03532
Iteration: 66, Log-Lik: -13.752, Max-Change: 0.03592
Iteration: 67, Log-Lik: -13.752, Max-Change: 0.09662
Iteration: 68, Log-Lik: -13.752, Max-Change: 0.09634
Iteration: 69, Log-Lik: -13.752, Max-Change: 0.09158
Iteration: 70, Log-Lik: -13.752, Max-Change: 0.04876
Iteration: 71, Log-Lik: -13.752, Max-Change: 0.09551
Iteration: 72, Log-Lik: -13.752, Max-Change: 0.15216
Iteration: 73, Log-Lik: -13.751, Max-Change: 0.11593
Iteration: 74, Log-Lik: -13.751, Max-Change: 0.12471
Iteration: 75, Log-Lik: -13.751, Max-Change: 0.13171
Iteration: 76, Log-Lik: -13.751, Max-Change: 0.18460
Iteration: 77, Log-Lik: -13.751, Max-Change: 0.00088
Iteration: 78, Log-Lik: -13.751, Max-Change: 0.00082
Iteration: 79, Log-Lik: -13.751, Max-Change: 0.20522
Iteration: 80, Log-Lik: -13.751, Max-Change: 0.00089
Iteration: 81, Log-Lik: -13.751, Max-Change: 0.00083
Iteration: 82, Log-Lik: -13.751, Max-Change: 0.20668
Iteration: 83, Log-Lik: -13.751, Max-Change: 0.00078
Iteration: 84, Log-Lik: -13.751, Max-Change: 0.12739
Iteration: 85, Log-Lik: -13.751, Max-Change: 0.21932
Iteration: 86, Log-Lik: -13.751, Max-Change: 0.00078
Iteration: 87, Log-Lik: -13.751, Max-Change: 0.00072
Iteration: 88, Log-Lik: -13.751, Max-Change: 0.12902
Iteration: 89, Log-Lik: -13.751, Max-Change: 0.00006
## 
## Calculating information matrix...

# M2(m_anxiety, type = "C2", calcNULL = FALSE)

itemfit(m_anxiety)
##        item S_X2 df.S_X2 RMSEA.S_X2 p.S_X2
## 1 Anxiety_1 14.7       1       1.31      0
## 2 Anxiety_2  NaN       0        NaN    NaN

# IRT parameters
IRT_parms <- coef(m_anxiety, IRTpars = TRUE, simplify = TRUE)
IRT_parms$items
##              a     b1    b2  b3
## Anxiety_1 75.3 -0.899 0.781  NA
## Anxiety_2 72.4 -0.899 0.103 1.2

# Factor
summary(m_anxiety)
##           F1    h2
## Anxiety_1  1 0.999
## Anxiety_2  1 0.999
## 
## SS loadings:  2 
## Proportion Var:  0.999 
## 
## Factor correlations: 
## 
##    F1
## F1  1

# Plots
plot(m_anxiety, type = 'score', theta_lim = c(-3, 3))

plot(m_anxiety, type = 'infoSE', theta_lim = c(-3, 3))

plot(m_anxiety, type = 'rxx', theta_lim = c(-3, 3))

Category Characteristic Curves (CRC) and Item Information Curves

out <- crc(m_anxiety, data, x = c(-1.25, 2.25))
out$p + labs(x = expression(Anxiety~(theta)))